emilianto <- readRDS("data/rds/emilianto.rds")
emilianto_attitude <- readRDS("data/rds/emilianto_attitude.rds")
attitudes <- readRDS("data/rds/attitudes.rds")
pa_spaces <- readRDS("data/rds/pa_spaces.rds")
ty_spaces <- readRDS("data/rds/ty_spaces.rds")
emilian <- emilianto %>%
filter(language == "Emilian")
emilian %>%
ggplot(aes(gender)) +
geom_bar()
emilian %>%
ggplot(aes(age)) +
geom_histogram()
emilian %>%
ggplot(aes(age)) +
geom_density()
emilian %>%
ggplot(aes(education)) +
geom_bar()
emilian %>%
ggplot(aes(age, education)) +
geom_jitter(height = 0.2, alpha = 0.5)
emilian %>%
count(profession) %>%
ggplot(aes(reorder(profession, -n), n)) +
geom_bar(stat = "identity")
emilian %>%
count(languages_family) %>%
ggplot(aes(reorder(languages_family, -n), n)) +
geom_bar(stat = "identity")
emilian %>%
count(languages_parents) %>%
ggplot(aes(reorder(languages_parents, -n), n)) +
geom_bar(stat = "identity")
emilian %>%
ggplot(aes(comprehend, fill = comprehend)) +
geom_bar() +
scale_fill_brewer(type = "div") +
theme_dark()
emilian %>%
ggplot(aes(comprehend, fill = gender)) +
geom_bar()
emilian %>%
ggplot(aes(comprehend, fill = gender)) +
geom_bar(position = "fill")
emilian %>%
ggplot(aes(age, fill = comprehend)) +
geom_histogram(binwidth = 5) +
facet_grid(comprehend ~ .)
emilian %>%
ggplot(aes(comprehend, fill = profession)) +
geom_bar()
emilian %>%
ggplot(aes(comprehend, fill = profession)) +
geom_bar(position = "fill")
emilian %>%
ggplot(aes(speak, fill = speak)) +
geom_bar() +
scale_fill_brewer(type = "div")
emilian %>%
ggplot(aes(speak, fill = gender)) +
geom_bar()
emilian %>%
ggplot(aes(speak, fill = gender)) +
geom_bar(position = "fill")
emilian %>%
ggplot(aes(age, fill = speak)) +
geom_histogram(binwidth = 5) +
facet_grid(speak ~ .)
emilian %>%
ggplot(aes(speak, fill = profession)) +
geom_bar()
emilian %>%
ggplot(aes(speak, fill = profession)) +
geom_bar(position = "fill")
emilian %>%
ggplot(aes(read_write, fill = read_write)) +
geom_bar()
emilian %>%
ggplot(aes(read_write, fill = gender)) +
geom_bar()
emilian %>%
ggplot(aes(read_write, fill = gender)) +
geom_bar(position = "fill")
emilian %>%
drop_na(read_write) %>%
ggplot(aes(age, fill = read_write)) +
geom_histogram(binwidth = 5) +
facet_grid(read_write ~ .)
emilian %>%
ggplot(aes(read_write, fill = profession)) +
geom_bar()
emilian %>%
ggplot(aes(read_write, fill = profession)) +
geom_bar(position = "fill")
emilian %>%
dplyr::select(educated:familiar) %>%
pivot_longer(educated:familiar, names_to = "feature", values_to = "rating") %>%
ggplot(aes(as.factor(rating), fill = as.factor(rating))) +
geom_bar() +
scale_fill_brewer() +
facet_grid(. ~ feature)
emilian %>%
dplyr::select(educated:familiar) %>%
pivot_longer(educated:familiar, names_to = "feature", values_to = "rating") %>%
ggplot(aes(feature, fill = as.factor(rating))) +
geom_bar(position = "fill") +
scale_fill_brewer()
emil_rur <- emilian %>%
mutate(
ru_ur = ifelse(
str_detect(birth_place, "-RU"), "rural",
ifelse(
str_detect(birth_place, "-UR"), "urban",
NA
)
)
)
emil_rur_clean <- emil_rur %>%
dplyr::select(id, comprehend, speak, read_write, educated:familiar, ru_ur) %>%
mutate(
comprehend = ordered(comprehend, levels = c("NO", "AL", "50/50", "G", "VG")),
speak = ordered(speak, levels = c("NO", "AL", "50/50", "G", "VG")),
across(educated:familiar, ~ as.ordered(.x))
) %>%
drop_na()
emil_rur %>%
drop_na(ru_ur) %>%
ggplot(aes(ru_ur, fill = comprehend)) +
geom_bar()
emil_rur %>%
drop_na(ru_ur) %>%
ggplot(aes(ru_ur, fill = comprehend)) +
geom_bar(position = "fill")
esperanto <- emilianto %>%
filter(language == "Esperanto")
esperanto %>%
ggplot(aes(gender)) +
geom_bar()
esperanto %>%
ggplot(aes(age)) +
geom_histogram()
esperanto %>%
ggplot(aes(age)) +
geom_density()
esperanto %>%
ggplot(aes(education)) +
geom_bar()
esperanto %>%
ggplot(aes(age, education)) +
geom_jitter(height = 0.2, alpha = 0.5)
esperanto %>%
count(profession) %>%
ggplot(aes(reorder(profession, -n), n)) +
geom_bar(stat = "identity")
esperanto %>%
count(languages_family) %>%
ggplot(aes(reorder(languages_family, -n), n)) +
geom_bar(stat = "identity")
esperanto %>%
ggplot(aes(comprehend, fill = comprehend)) +
geom_bar() +
scale_fill_brewer(type = "div") +
theme_dark()
esperanto %>%
ggplot(aes(comprehend, fill = gender)) +
geom_bar()
esperanto %>%
ggplot(aes(comprehend, fill = gender)) +
geom_bar(position = "fill")
esperanto %>%
ggplot(aes(age, fill = comprehend)) +
geom_histogram(binwidth = 5) +
facet_grid(comprehend ~ .)
esperanto %>%
ggplot(aes(comprehend, fill = profession)) +
geom_bar()
esperanto %>%
ggplot(aes(comprehend, fill = profession)) +
geom_bar(position = "fill")
esperanto %>%
ggplot(aes(speak, fill = speak)) +
geom_bar() +
scale_fill_brewer(type = "div")
esperanto %>%
ggplot(aes(speak, fill = gender)) +
geom_bar()
esperanto %>%
ggplot(aes(speak, fill = gender)) +
geom_bar(position = "fill")
esperanto %>%
ggplot(aes(age, fill = speak)) +
geom_histogram(binwidth = 5) +
facet_grid(speak ~ .)
esperanto %>%
ggplot(aes(speak, fill = profession)) +
geom_bar()
esperanto %>%
ggplot(aes(speak, fill = profession)) +
geom_bar(position = "fill")
esperanto %>%
ggplot(aes(read_write, fill = read_write)) +
geom_bar()
esperanto %>%
dplyr::select(educated:familiar) %>%
pivot_longer(educated:familiar, names_to = "feature", values_to = "rating") %>%
drop_na() %>%
ggplot(aes(as.factor(rating), fill = as.factor(rating))) +
geom_bar() +
scale_fill_brewer() +
facet_grid(. ~ feature)
esperanto %>%
dplyr::select(educated:familiar) %>%
pivot_longer(educated:familiar, names_to = "feature", values_to = "rating") %>%
drop_na() %>%
ggplot(aes(feature, fill = as.factor(rating))) +
geom_bar(position = "fill") +
scale_fill_brewer()
emilianto_attitude %>%
ggplot(aes(age, dim_1, colour = language)) +
geom_point() +
geom_smooth(method = "lm", formula = "y ~ x")
if (file.exists("./data/raw/geo.csv")) {
cat("Reading geocoding...\n")
geo <- read_csv("./data/raw/geo.csv")
} else {
birth_em <- emilian %>% select(birth_place_it) %>% unique()
geo <- geocode(birth_em, city = birth_place_it, method = "osm", verbose = TRUE)
write_csv(geo, file = "./data/raw/geo.csv")
}
## Reading geocoding...
europe <- ne_countries(continent = "Europe", returnclass = "sf", scale = "medium")
ggplot() +
geom_sf(data = europe) +
geom_point(data = geo, aes(long, lat)) +
coord_sf(xlim = c(7, 14), ylim = c(43, 47))
columns <- c("gender", "age_2", "profession_2", "languages_family_2")
for (column in columns) {
emilianto %>%
group_by(language) %>%
count(.data[[column]]) %>%
mutate(
prop = round(n / sum(n, na.rm = TRUE) * 100, 1)
) %>%
pivot_wider(names_from = .data[[column]], values_from = c(n, prop)) %>%
print()
}
## # A tibble: 2 × 7
## # Groups: language [2]
## language n_F n_M `n_LGBTQ+` prop_F prop_M `prop_LGBTQ+`
## <chr> <int> <int> <int> <dbl> <dbl> <dbl>
## 1 Emilian 269 165 NA 62 38 NA
## 2 Esperanto 32 118 4 20.8 76.6 2.6
## # A tibble: 2 × 9
## # Groups: language [2]
## language `n_0-20` `n_21-40` `n_41-60` `n_61+` `prop_0-20` `prop_21-40`
## <chr> <int> <int> <int> <int> <dbl> <dbl>
## 1 Emilian 109 191 106 28 25.1 44
## 2 Esperanto 8 39 53 54 5.2 25.3
## # ℹ 2 more variables: `prop_41-60` <dbl>, `prop_61+` <dbl>
## # A tibble: 2 × 11
## # Groups: language [2]
## language `n_not in work` n_skilled n_student n_unskilled n_NA
## <chr> <int> <int> <int> <int> <int>
## 1 Emilian 45 95 153 138 3
## 2 Esperanto 4 62 20 21 47
## # ℹ 5 more variables: `prop_not in work` <dbl>, prop_skilled <dbl>,
## # prop_student <dbl>, prop_unskilled <dbl>, prop_NA <dbl>
## # A tibble: 2 × 11
## # Groups: language [2]
## language n_mono_e n_mono_o n_multi_e n_multi_o n_NA prop_mono_e prop_mono_o
## <chr> <int> <int> <int> <int> <int> <dbl> <dbl>
## 1 Emilian 2 261 112 57 2 0.5 60.1
## 2 Esperanto 1 91 33 29 NA 0.6 59.1
## # ℹ 3 more variables: prop_multi_e <dbl>, prop_multi_o <dbl>, prop_NA <dbl>
columns <- c("gender", "age_2", "profession_2", "languages_family_2")
for (column in columns) {
emilianto %>%
filter(
comprehend %in% c("50/50", "G", "VG"),
speak %in% c("50/50", "G", "VG")
) %>%
group_by(language) %>%
count(.data[[column]]) %>%
mutate(
prop = round(n / sum(n, na.rm = TRUE) * 100, 1)
) %>%
pivot_wider(names_from = .data[[column]], values_from = c(n, prop)) %>%
print()
}
## # A tibble: 2 × 7
## # Groups: language [2]
## language n_F n_M `n_LGBTQ+` prop_F prop_M `prop_LGBTQ+`
## <chr> <int> <int> <int> <dbl> <dbl> <dbl>
## 1 Emilian 160 100 NA 61.5 38.5 NA
## 2 Esperanto 28 112 4 19.4 77.8 2.8
## # A tibble: 2 × 9
## # Groups: language [2]
## language `n_0-20` `n_21-40` `n_41-60` `n_61+` `prop_0-20` `prop_21-40`
## <chr> <int> <int> <int> <int> <dbl> <dbl>
## 1 Emilian 63 113 62 22 24.2 43.5
## 2 Esperanto 6 36 48 54 4.2 25
## # ℹ 2 more variables: `prop_41-60` <dbl>, `prop_61+` <dbl>
## # A tibble: 2 × 11
## # Groups: language [2]
## language `n_not in work` n_skilled n_student n_unskilled n_NA
## <chr> <int> <int> <int> <int> <int>
## 1 Emilian 29 61 88 80 2
## 2 Esperanto 2 60 18 20 44
## # ℹ 5 more variables: `prop_not in work` <dbl>, prop_skilled <dbl>,
## # prop_student <dbl>, prop_unskilled <dbl>, prop_NA <dbl>
## # A tibble: 2 × 11
## # Groups: language [2]
## language n_mono_e n_mono_o n_multi_e n_multi_o n_NA prop_mono_e prop_mono_o
## <chr> <int> <int> <int> <int> <int> <dbl> <dbl>
## 1 Emilian 2 157 70 29 2 0.8 60.4
## 2 Esperanto 1 82 33 28 NA 0.7 56.9
## # ℹ 3 more variables: prop_multi_e <dbl>, prop_multi_o <dbl>, prop_NA <dbl>
columns <- c("gender", "age_2", "profession_2", "languages_family_2")
emilianto_50 <- emilianto %>%
mutate(
und_speak = case_when(
comprehend %in% c("50/50", "G", "VG") ~ ">50",
speak %in% c("50/50", "G", "VG") ~ ">50",
TRUE ~ "<50"
)
)
for (column in columns) {
emilianto_50 %>%
group_by(language) %>%
add_count(name = "tot") %>%
ungroup() %>%
count(.data[[column]], und_speak, language, tot) %>%
mutate(
prop = round((n / tot) * 100, 1)
) %>%
pivot_wider(names_from = .data[[column]], values_from = c(n, prop)) %>%
print()
}
## # A tibble: 4 × 9
## und_speak language tot n_F `n_LGBTQ+` n_M prop_F `prop_LGBTQ+` prop_M
## <chr> <chr> <int> <int> <int> <int> <dbl> <dbl> <dbl>
## 1 <50 Emilian 434 24 NA 13 5.5 NA 3
## 2 <50 Esperanto 154 2 NA 3 1.3 NA 1.9
## 3 >50 Emilian 434 245 NA 152 56.5 NA 35
## 4 >50 Esperanto 154 30 4 115 19.5 2.6 74.7
## # A tibble: 4 × 11
## und_speak language tot `n_0-20` `n_21-40` `n_41-60` `n_61+` `prop_0-20`
## <chr> <chr> <int> <int> <int> <int> <int> <dbl>
## 1 <50 Emilian 434 7 23 7 NA 1.6
## 2 >50 Emilian 434 102 168 99 28 23.5
## 3 >50 Esperanto 154 8 37 50 54 5.2
## 4 <50 Esperanto 154 NA 2 3 NA NA
## # ℹ 3 more variables: `prop_21-40` <dbl>, `prop_41-60` <dbl>, `prop_61+` <dbl>
## # A tibble: 4 × 13
## und_speak language tot `n_not in work` n_skilled n_student n_unskilled n_NA
## <chr> <chr> <int> <int> <int> <int> <int> <int>
## 1 <50 Emilian 434 2 9 15 11 NA
## 2 <50 Esperan… 154 2 1 NA NA 2
## 3 >50 Emilian 434 43 86 138 127 3
## 4 >50 Esperan… 154 2 61 20 21 45
## # ℹ 5 more variables: `prop_not in work` <dbl>, prop_skilled <dbl>,
## # prop_student <dbl>, prop_unskilled <dbl>, prop_NA <dbl>
## # A tibble: 4 × 13
## und_speak language tot n_mono_e n_mono_o n_multi_e n_multi_o n_NA
## <chr> <chr> <int> <int> <int> <int> <int> <int>
## 1 >50 Emilian 434 2 241 102 50 2
## 2 >50 Esperanto 154 1 86 33 29 NA
## 3 <50 Emilian 434 NA 20 10 7 NA
## 4 <50 Esperanto 154 NA 5 NA NA NA
## # ℹ 5 more variables: prop_mono_e <dbl>, prop_mono_o <dbl>, prop_multi_e <dbl>,
## # prop_multi_o <dbl>, prop_NA <dbl>
emilianto %>%
pivot_longer(all_of(c("comprehend", "speak")), names_to = "competence", values_to = "level") %>%
mutate(
competence = factor(competence, levels = c("comprehend", "speak"))
) %>%
ggplot(aes(competence, fill = level)) +
geom_bar(position = "fill") +
scale_fill_brewer(type = "div", palette = "PRGn") +
facet_grid(~ language) +
labs(y = "Proportion")
ggsave("./img/competence.png", width = 7, height = 5)
pa_spaces %>%
ggplot(aes(gender, proportion, fill = gender)) +
geom_hline(yintercept = 0, size = 0.25) +
geom_hline(yintercept = 1, size = 0.25) +
geom_violin(colour = NA, bw = 0.1) +
geom_boxplot(width = 0.05, fill = "black", colour = "black") +
stat_summary(colour = "white", fun = "median", geom = "point") +
ylim(0, 1) +
facet_grid(spaces ~ language, labeller = as_labeller(list("competence" = c("a", "b")))) +
scale_fill_brewer(type = "qual", palette = "Dark2") +
labs(
y = "Spaces (proportion)",
x = "Gender"
) +
theme_minimal(base_size = 16) +
theme(legend.position = "none")
ggsave("./img/spaces.png", width = 7, height = 5)
ty_spaces %>%
ggplot(aes(types, proportion, fill = types)) +
geom_hline(yintercept = 0, size = 0.25) +
geom_hline(yintercept = 1, size = 0.25) +
geom_violin(colour = NA) +
geom_boxplot(width = 0.05, fill = "black", colour = "black") +
stat_summary(colour = "white", fun = "median", geom = "point") +
ylim(0, 1) +
facet_grid(~ language) +
scale_fill_brewer(type = "qual", palette = "Dark2") +
labs(
y = "Total spaces (proportion)",
x = "Space type"
) +
theme_minimal(base_size = 16) +
theme(legend.position = "none")
ggsave("./img/space-types.png", width = 7, height = 5)
ty_spaces %>%
dplyr::select(language, id) %>%
distinct() %>%
count(language)
## # A tibble: 2 × 2
## language n
## <chr> <int>
## 1 Emilian 434
## 2 Esperanto 150
n_spaces <- c(42, 2, 2, 13, 23, 12, 11)
n_spaces_em <- n_spaces * 434
n_spaces_eo <- n_spaces * 150
ty_spaces_count <- ty_spaces %>%
dplyr::select(language, id, ends_with("_spaces")) %>%
distinct() %>%
pivot_longer(passive_spaces:other_spaces, names_to = "space_type", values_to = "count") %>%
group_by(language, space_type) %>%
summarise(
total_count = sum(count),
.groups = "drop"
)
ty_spaces_count$n_spaces <- c(n_spaces_em, n_spaces_eo)
ty_spaces_perc <- ty_spaces_count %>%
mutate(
perc = round((total_count / n_spaces) * 100)
) %>%
mutate(space_type = str_remove(space_type, "_spaces"))
ty_spaces_perc %>%
filter(!(space_type %in% c("passive", "active"))) %>%
mutate(
space_type = factor(space_type, levels = c("physical", "media", "press", "virtual", "other"))
) %>%
ggplot(aes(space_type, perc, fill = space_type)) +
geom_bar(stat = "identity") +
scale_fill_brewer(type = "qual", palette = "Dark2") +
facet_grid(~ language) +
ylim(0, 100) +
labs(x = "Space type", y = "Percent") +
theme(legend.position = "none")
ggsave("./img/space-types-perc.png", width = 7, height = 5)
emilianto_lik <- emilianto_attitude %>%
dplyr::select(language, educated:familiar) %>%
pivot_longer(educated:familiar, names_to = "quality", values_to = "score") %>%
count(language, quality, score) %>%
pivot_wider(names_from = "score", values_from = n) %>%
rename("strong disagree" = `1`, "disagree" = `2`, "neither" = `3`, "agree" = `4`, "strong agree" = `5`)
png("./img/likert.png", width = 7, height = 5, units = "in", res = 600)
likert(
quality ~ . | language,
emilianto_lik,
as.percent = TRUE,
main = ""
)
dev.off()
## quartz_off_screen
## 2